单细胞学习

空间转录组---STEEL+Seurat

2023-03-11  本文已影响0人  jjjscuedu

今天我们学习另外一个兰花空转文章提到的一个方法:STEEL。好像还是个预印版的文章,但是不影响使用。

这个帖子还是将继续使用兰花空间转录组的数据,同时运用文献中提到的STEEL聚类方法进行分析。

==========下载========

官方地址:https://steel-st.sourceforge.io/

=========使用说明==========

使用说明:

必须的几个参数:

expression_matrix:表达矩阵

space_matrix:空间信息

out_prefix:输出前缀

关键的几个参数:

--beads   --genes   --pca   --group

=======运行测试数据========

#注意,STEEL运行,如果指定的是表达值的文件夹的话,它是去找:

read file: Slide_1.1/filtered_feature_bc_matrix/barcodes.tsv

2379 beads loaded

read file: Slide_1.1/filtered_feature_bc_matrix/features.tsv

28903 genes loaded

read file: Slide_1.1/filtered_feature_bc_matrix/matrix.mtx

#这3个文件,因为默认的是压缩的,所以需要先解压一下,不然找不到,作者可能后面要修改一下这个地方了。

steel Slide_1.1/filtered_feature_bc_matrix/ Slide_1.1/spatial/tissue_positions_list.csv pca20out --pca=20

输出结果如下图所示,genes文件里面包含的就是聚类特异基因,map文件就是聚类信息,我们使用pca20out.map.40(文中也选取的是40个cluster)的聚类信息进行后续的分析。

======seurat显示聚类结果======

library(Seurat)

library(dplyr)

library(ggplot2)

library(magrittr)

library(cowplot)

library(gtools)

library(stringr)

library(Matrix)

library(tidyverse)

library(patchwork)

#导入原始Slide1的数据

orc1<- Load10X_Spatial("Slide_1")

## 直接导入STEEL的聚类信息

STEEL<- read.delim("STEEL_out/pca20out.map.40",row.names = 1)

orc1[["seurat_clusters"]] <- NA

clusters<-data.frame(STEEL$Cluster)

rownames(clusters) <- rownames(STEEL)

orc1[["seurat_clusters"]][rownames(clusters),] <- clusters

#好像STEEL中仅仅还有2232个cells,比原始的数据是少的,可能STEEL经过了过滤。所以我们需要只取出这2232个进行后续的分析。

## 去除NA值只保留有聚类的

orc1_new <- orc1[,rownames(clusters)]

Idents(orc1_new) <- 'seurat_clusters'

Idents(orc1_new) <- factor(Idents(orc1_new),levels=mixedsort(levels(Idents(orc1_new))))

#下面就是进行显示了。

orc1_new <- SCTransform(orc1_new, assay = "Spatial", return.only.var.genes = FALSE, verbose = FALSE)

orc1_new <- RunPCA(orc1_new, features = VariableFeatures(orc1_new))

orc1_new <- RunTSNE(orc1_new, dims = 1:20)

p1 <- DimPlot(orc1_new, reduction = "tsne", label = TRUE)

p2 <- SpatialDimPlot(orc1_new, group.by = "seurat_clusters",label.size = 3, pt.size.factor = 1.3)

pearplot <- plot_grid(p1,p2)

ggsave("tsne_Slide1_40.pdf", plot = pearplot, width = 6, height = 6)

========拟时序分析=======

library(monocle)

#在兰花这个paper中,有2块关于拟时序分析的,例如figure 2和Figure 4。我们简单测试一下figure 2的结果。

subdata <- subset(orc1_new, idents = c(19,21,37,38,39,40))

#选择要分析的亚群

expression_matrix = subdata@assays$Spatial@counts

cell_metadata <- data.frame(group = subdata[['orig.ident']],clusters = Idents(subdata))

gene_annotation <- data.frame(gene_short_name = rownames(expression_matrix), stringsAsFactors = F)

rownames(gene_annotation) <- rownames(expression_matrix)

pd <- new("AnnotatedDataFrame", data = cell_metadata)

fd <- new("AnnotatedDataFrame", data = gene_annotation)

HSMM <- newCellDataSet(expression_matrix,

                      phenoData = pd,

                      featureData = fd,

                      expressionFamily=negbinomial.size())

HSMM <- detectGenes(HSMM,min_expr = 0.1)

HSMM_myo <- estimateSizeFactors(HSMM)

HSMM_myo <- estimateDispersions(HSMM_myo)

disp_table <- dispersionTable(HSMM_myo)

disp.genes <- subset(disp_table, mean_expression >= 0.1 )

disp.genes <- as.character(disp.genes$gene_id)

HSMM_myo <- reduceDimension(HSMM_myo, max_components = 2, method = 'DDRTree')

HSMM_myo <-orderCells(HSMM_myo,reverse = T)

#State轨迹分布图

plot1 <- plot_cell_trajectory(HSMM_myo, color_by = "State")

##Cluster轨迹分布图

plot2 <- plot_cell_trajectory(HSMM_myo, color_by = "clusters")

##Pseudotime轨迹图

plot3 <- plot_cell_trajectory(HSMM_myo, color_by = "Pseudotime")

plotc <- plot1|plot2|plot3

ggsave("Combination1.pdf", plot = plotc, width = 18, height = 6.2)

空间转录组的好处就在于可以把拟时结果体现在我们的组织切片上,这样我们在orderCells这一步可以更加方便的判断每个spot的拟时间。

#绘制拟时间

cell_Pseudotime <- data.frame(pData(HSMM_myo)$Pseudotime)

rownames(cell_Pseudotime) <- rownames(cell_metadata)

#把拟时间对应到到组织切片位置上

orc1_new[['Pseudotime']] <- NA

orc1_new[['Pseudotime']][rownames(cell_Pseudotime),] <- cell_Pseudotime

p1 <- SpatialFeaturePlot(orc1_new, features = c("Pseudotime"),pt.size.factor = 1.3)

ggsave("pseudotime_feature1.pdf", plot = p1, width = 8, height = 9)

#highlight某些基因在拟时序上面的表达值

data_subset <- HSMM_myo['PAXXG054350',]

p1<-plot_genes_in_pseudotime(data_subset, color_by = "clusters")

data_subset <- HSMM_myo['PAXXG051950',]

p2<-plot_genes_in_pseudotime(data_subset, color_by = "clusters")

data_subset <- HSMM_myo['PAXXG086750',]

p3<-plot_genes_in_pseudotime(data_subset, color_by = "clusters")

data_subset <- HSMM_myo['PAXXG345890',]

p4<-plot_genes_in_pseudotime(data_subset, color_by = "clusters")

data_subset <- HSMM_myo['PAXXG010560',]

p5<-plot_genes_in_pseudotime(data_subset, color_by = "clusters")

data_subset <- HSMM_myo['PAXXG074500',]

p6<-plot_genes_in_pseudotime(data_subset, color_by = "clusters") #color_by可以换成state或者pseudotime

pearplot <- plot_grid(p1,p2,p3,p4,p5,p6,align = "v",axis = "b",ncol = 2)

ggsave("gene_pseudotime1.pdf", plot = pearplot, width = 10, height = 15)

#拟时相关基因聚类热图

disp_table <- dispersionTable(HSMM_myo)

disp.genes <- subset(disp_table, mean_expression >= 0.5&dispersion_empirical >= 1*dispersion_fit)

disp.genes <- as.character(disp.genes$gene_id)

mycds_sub <- HSMM_myo[disp.genes,]

diff_test <- differentialGeneTest(HSMM_myo[disp.genes,], cores = 4,

                                  fullModelFormulaStr = "~sm.ns(Pseudotime)")

sig_gene_names <- row.names(subset(diff_test, qval < 1e-04))

pdf("pseudotime_heatmap1.pdf", width = 10, height = 8)

plot_pseudotime_heatmap(HSMM_myo[sig_gene_names,], num_clusters=6,

                            show_rownames=F)

dev.off()

但是,通篇用下来,好像STEEL只是做了个聚类,还没仔细看paper,不确定STEEL的聚类优于SEURAT等其它方法强的地方。

上一篇下一篇

猜你喜欢

热点阅读