单细胞聚类批次效应量化_标签匀度
2023-05-10 本文已影响0人
倪桦
推荐指标
1、LISI脚本
library(Seurat);library(SeuratData);library(dplyr);library(ggplot2)
data("pbmc3k")
cur_seu <- pbmc3k %>% NormalizeData() %>% FindVariableFeatures() %>% ScaleData() %>% RunPCA() %>% RunUMAP(dims = 1:10)
cur_seu$batch <- sample(letters[1:10],size = ncol(cur_seu),replace = T)
X <- cur_seu@reductions$pca@cell.embeddings
lisi_index <- lisi::compute_lisi(X,meta_data = cur_seu@meta.data, label_colnames="batch",perplexity = 50)
t.test(lisi_index[[1]])$conf.int ### 均值置信区间
p_dim <- DimPlot(cur_seu,group.by = "batch") + labs(title = "")
p_lisi <- lisi_index %>%
ggplot() +
geom_boxplot(aes(x = 1,y = batch),notch = T,width = 0.4,outlier.color = "orange",outlier.shape = 8,outlier.alpha = .7) +
geom_rect(data = lisi_index %>% rstatix::get_summary_stats(),aes(xmin = 1-0.2, xmax = 1+ 0.2, ymin = q1, ymax = median), fill = "#008AA7", color = "black") +
geom_rect(data = lisi_index %>% rstatix::get_summary_stats(),aes(xmin = 1-0.2, xmax = 1+ 0.2, ymin = median, ymax = q3), fill = "#A2EFFD", color = "black") +
geom_point(data = lisi_index %>% rstatix::get_summary_stats(),aes(x =1,y = mean),shape = 8,size = 3) +
scale_y_continuous(position = "right") + theme_classic() + coord_flip() +
theme(axis.line.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
plot.margin = margin(l = 52,r = 50,t = 10,b = 10),
panel.border = element_rect(fill = NA,colour = NA)) +
labs(y = "iLISI", x = "")
cowplot::plot_grid(p_lisi,p_dim,ncol = 1,rel_heights = c(0.15,0.7),rel_widths = c(0.5,0.2))
2、ARI脚本
cur_seu <- pbmc3k[,!is.na(pbmc3k$seurat_annotations)]
cur_seu <- cur_seu %>% NormalizeData() %>% FindVariableFeatures() %>% ScaleData() %>% RunPCA() %>% RunUMAP(dims = 1:10)
cur_seu <- cur_seu %>% FindNeighbors(dims = 1:20) %>% FindClusters(resolution = 0.5)
cur_seu$batch <- sample(1:20,size = ncol(cur_seu),replace = T)
### 当ARI指数为负数时,说明聚类结果比随机分配还要差,这通常是由于聚类结果与真实标签之间的一致性非常低所导致的,
ari_info <- GeometricMorphometricsMix::adjRand_test(cur_seu$seurat_clusters, cur_seu$batch) ### H0: 每个簇内样本标记是随机混乱的
# aricode::ARI(cur_seu$seurat_clusters, cur_seu$batch)
# entropy_lst <- aricode::entropy(cur_seu$seurat_clusters, cur_seu$batch) ### 其它量化指标,相互熵
https://cran.r-project.org/web/packages/distributions3/vignettes/one-sample-t-confidence-interval.html
https://rdrr.io/github/fruciano/GeometricMorphometricsMix/man/adjRand_test.html
https://github.com/jchiquet/aricode
https://carmonalab.github.io/STACAS.demo/STACAS.demo.html
https://github.com/immunogenomics/LISI
https://broadinstitute.github.io/2019_scWorkshop/batch-effects.html