单细胞转录组

单细胞转录组数据分析(三):识别celltype

2021-01-04  本文已影响0人  生信学习者2

识别celltype是单细胞数据分析最关键的一步,识别方法主要分为手动和自动注释,暂时记录下手动注释的过程(极其地痛苦,如果可以还是自动注释更方便,但准确度可能不好,最佳的方法是手动+自动的方式)。更多知识分享请到 https://zouhua.top/

加载R包

library(dplyr)
library(Seurat)
library(tibble)
library(ggplot2)
library(cowplot)

# 设置全局参数
rm(list = ls())
options(stringsAsFactors = F)
options(future.globals.maxSize = 4000 * 1024^2)

colors <- c("#A6CEE3", "#1F78B4", "#08306B", "#B2DF8A", "#006D2C", "#8E0152",
         "#DE77AE", "#CAB2D6", "#6A3D9A", "#FB9A99", "#E31A1C", "#B15928",
         "#619CFF","#FF67A4","#00BCD8", "#EE2B2B", "#2D6BB4")

加载数据

data.cluster <- readRDS("../../Result/RDS/data.cluster04.rds")

查看cluster

DimPlot(data.cluster,
        reduction = "umap",
        label = TRUE,
        label.size = 6,
        cols = colors)

识别所有cluster的marker gene

annotations <- read.csv("../../Result/annotations_mus.csv")
markers <- FindAllMarkers(object = data.cluster,
                            only.pos = TRUE,
                            logfc.threshold = 0.25)
#write.csv(markers, "../../Result/data.all.markergenes.csv")
#markers <- read.csv("../../Result/data.all.markergenes.csv", row.names = 1)

评估marker genes

anno_all_markers <- markers %>%
        left_join(y = unique(annotations[, c("gene_name", "description")]),
                  by = c("gene" = "gene_name")) %>%
        mutate(Specific=pct.1 - pct.2) #%>%
  #arrange(dplyr::desc(Specific), dplyr::desc(avg_logFC))

# Extract top 50 markers per cluster
top50 <- anno_all_markers %>%
    group_by(cluster) %>%
    top_n(n = 50, wt = avg_logFC) %>%
    ungroup()
mapal <- colorRampPalette(c("#EEEED1", "#AEDCF9", "#10106A"))(20)
DoHeatmap(object = data.cluster, features = top50$gene, label = TRUE) +
  scale_fill_gradientn(colours = mapal)
library(pheatmap)
exp_ave <- AverageExpression(data.cluster)
CorOb.cor.exp <- as.data.frame(cor(exp_ave$RNA, method = "pearson"))
pheatmap(CorOb.cor.exp, color=mapal)

网上查找组织细胞的特异基因

Marker genes Celltype
AQP1, BST2 endothelial cell
CXCL2 Fibroblasts,endothelial cell
ADRA2C,NTRK2 Fibroblasts,smooth muscle cell
RBP7,CTHRC1 endothelial cell
LUM, MMP2,SERPINF1,DCN Fibroblasts
HYAL2,CALCRL,CD320 endothelial cell
ABCC9,CPE Fibroblasts, endothelial cell
TTN cardiomyocyte
LBH,MYH11,MYLK smooth muscle cell,Fibroblasts
NKG7,KLRD1,PRF1 Natural killer cells
AIF1,MS4A6A,C3AR1 Macrophages,Monocytes
FeaturePlot(data.cluster,
            reduction = "umap",
            features = c("Cd3g", "Cd3d", "Ptprc"),
            pt.size = 0.4,
            order = TRUE,
            min.cutoff = 'q10',
            label = TRUE)

根据marker genes结果重命名cluster

data.cluster <- RenameIdents(object = data.cluster,
                               "0" = "cardiomyocyte",
                               "1" = "fibroblasts",
                               "2" = "cardiomyocyte",
                               "3" = "macrophage",
                               "4" = "cardiomyocyte",
                               "5" = "endothelial cells",
                               "6" = "cardiomyocyte",
                               "7" = "T cells",
                               "8" = "Unknown",
                               "9" = "granulocyte",
                               "10" = "Unknown",
                               "11" = "Unknown",
                               "12" = "Unknown")

DimPlot(object = data.cluster,
        reduction = "umap",
        label = TRUE,
        label.size = 5,
        repel = TRUE,
        cols = colors)

提取或丢弃某些cluster

# 提取
data.choose <- subset(data.cluster,
                      idents = c("cardiomyocyte", "macrophage"),
                      invert = FALSE)
# 删除 
data.cluster <- subset(data.cluster,
                          idents = "Unknown",
                          invert = TRUE)

Reference

参考文章如引起任何侵权问题,可以与我联系,谢谢。

上一篇 下一篇

猜你喜欢

热点阅读