复现2：AD与Normal细胞类型水平的差异基因挖掘(snRNA

2021-08-24 本文已影响0人小贝学生信

Single-nucleus transcriptome analysis reveals dysregulation of angiogenic endothelial cells and neuroprotective glia in Alzheimer's disease
PMID: 32989152 | 2020 Oct 13
IF : 11.202 | Proc Natl Acad Sci USA(PNAS)

overall design

文献思路：对AD和Normal来源的前额皮质组织进行snRNA-seq；进行细胞类型注释，然后基于细胞类型的水平，对AD和Normal进行差异分析。
1-3

Paper1B-D 细胞类型注释

2-3

Paper2A-D 基因细胞类型，AD与Normal的比较
3-3
Paper3A-D 细胞类型的深入挖掘(以Astro为例)
图表复现思路
（1）原始数据预处理，读为Seurat对象
（2）质控、过滤
（3）标准化、高变基因、归一化、降维(PCA、UMAP)、分群
（4）细胞类型注释
（5）基于细胞类型的AD/NC差异分析
（6）以Astro为例的细胞再聚类深入分析

提示：因为本次数据集涉及17W~细胞，所以使用的是服务器Rstudio，本地电脑比较困难。

1、读入数据

1.1 下载数据(GSE157827)--上传到服务器--批量改名(`Read10X()`)--创建Seurat对象

也可以在服务器中，直接使用 wget命令下载：https://ftp.ncbi.nlm.nih.gov/geo/series/GSE157nnn/GSE157827/suppl/

setwd("~/scAD")
fs=list.files('./GSE157827_RAW/','^GSM')
fs
library(tidyverse)
samples=str_split(fs,'_',simplify = T)[,1]
#批量将文件名改为 Read10X()函数能够识别的名字
lapply(unique(samples),function(x){
  # x = unique(samples)[1]
  y=fs[grepl(x,fs)]
  folder=paste0("GSE157827_RAW/", paste(str_split(y[1],'_',simplify = T)[,1:2], collapse = "_"))
  dir.create(folder,recursive = T)
  #为每个样本创建子文件夹
  file.rename(paste0("GSE157827_RAW/",y[1]),file.path(folder,"barcodes.tsv.gz"))
  #重命名文件，并移动到相应的子文件夹里
  file.rename(paste0("GSE157827_RAW/",y[2]),file.path(folder,"features.tsv.gz"))
  file.rename(paste0("GSE157827_RAW/",y[3]),file.path(folder,"matrix.mtx.gz"))
})
library(Seurat)
samples=list.files("GSE157827_RAW/")
samples
dir <- file.path('./GSE157827_RAW',samples)
names(dir) <- samples
#创建Seurat对象
counts <- Read10X(data.dir = dir)
scRNA = CreateSeuratObject(counts)
dim(scRNA)   #查看基因数和细胞总数
#[1]  33538 179392
table(scRNA@meta.data$orig.ident)  #查看每个样本的细胞数
head(scRNA@meta.data)

1.2 metadata细节整理

scRNA$orig.ident = stringr::str_split(rownames(scRNA@meta.data), "_",simplify = T)[,2]
# AD、NC
scRNA$group = substr(scRNA$orig.ident,1,2)
head(scRNA@meta.data)
table(scRNA$group)
table(scRNA$orig.ident)
#为了绘图直观，factor因子化，并且设置指定的levels排序
ranks = order(as.numeric(substr(unique(scRNA$orig.ident),3,4)))
scRNA$orig.ident = factor(scRNA$orig.ident, levels = unique(scRNA$orig.ident)[ranks])
table(scRNA$orig.ident, scRNA$group)

2、质控、过滤低质量的细胞、基因

2.1 过滤前的指标可视化

#指标1：nFeature_RNA 表达基因数
feats <- c("nFeature_RNA", "nCount_RNA")
library(patchwork)
p_filt_b_1=VlnPlot(scRNA, features = feats, pt.size = 0, ncol = 2, 
        group.by = "group") + NoLegend()
p_filt_b_2=VlnPlot(scRNA, features = feats, pt.size = 0, ncol = 1, 
        group.by = "orig.ident") + NoLegend()
#指标2~3：线粒体、核糖体基因含量
mito_genes=rownames(scRNA)[grep("^MT-", rownames(scRNA))] ;mito_genes 
scRNA=PercentageFeatureSet(scRNA, "^MT-", col.name = "percent_mito")
fivenum(scRNA@meta.data$percent_mito)
ribo_genes=rownames(scRNA)[grep("^Rp[sl]", rownames(scRNA),ignore.case = T)];ribo_genes
scRNA=PercentageFeatureSet(scRNA, "^RP[SL]", col.name = "percent_ribo")
fivenum(scRNA@meta.data$percent_ribo)
feats <- c("percent_mito","percent_ribo")
p_filt_b_3 =VlnPlot(scRNA, features = feats, pt.size = 0, ncol = 2, 
        group.by = "group") + NoLegend()
p_filt_b_4=VlnPlot(scRNA, features = feats, pt.size = 0, ncol = 1, 
        group.by = "orig.ident") + NoLegend()
(p_filt_b_1 / p_filt_b_2) | (p_filt_b_3 / p_filt_b_4)

2.2 确定过滤细胞、基因的阈值

细胞的相关阈值

#细胞-表达基因数-筛掉过高、过低
retained_c_umi_low <- scRNA$nFeature_RNA > 300
retained_c_umi_high <- scRNA$nFeature_RNA < 8000
#细胞-线粒体/核糖体含量-筛掉过低
retained_c_mito <- scRNA$percent_mito < 14
retained_c_ribo <- scRNA$percent_ribo < 3
table(retained_c_mito & retained_c_ribo & retained_c_umi_low & retained_c_umi_high)
# FALSE   TRUE 
# 9876 169516
table(scRNA$group[retained_c_mito & retained_c_ribo & retained_c_umi_low & retained_c_umi_high])
# AD    NC 
# 90480 79036

#指标可视化
#devtools::install_github("gaospecial/ggVennDiagram")
veen_eles = list(`Filt_low_umi\n(300)` = umi_low,
                 `Filt_high_umi\n(8000)` = umi_high,
                 `Filt_high_mito\n(14%)` = mito,
                 `Filt_high_ribo\n(3%)` = ribo)
library("ggVennDiagram")
ggVennDiagram(veen_eles, set_size = 4)  +
  ggplot2::scale_fill_gradient(low="#ffffb2",high = "#f03b20")

值得注意的是：snRNA-seq与scRNA-seq分析过程中最主要的不同便在于线粒体与核糖体比例。因为前者测的仅为核基因数据，所以理论上很少有核糖体、线粒体相关基因，因此需要过滤掉高表达核糖体、线粒体相关基因。

基因的相关阈值

feature_rowsum = Matrix::rowSums(scRNA@assays$RNA@counts>0) 
head(feature_rowsum)
retained_f_low <- feature_rowsum > ncol(scRNA)*0.005
table(retained_f_low)
# FALSE  TRUE 
# 16262 17276
rankplot = data.frame(feature_count = sort(feature_rowsum),
                          gene = names(sort(retained_f_low)),
                          Rank = 1:length(feature_rowsum))
library(ggbreak)
ggplot(rankplot, aes(x=Rank, y=feature_count)) + 
  geom_point() + scale_y_break(c(10000,100000)) +
  geom_hline(yintercept=ncol(scRNA)*0.005, color="red") +
  geom_text(x=10000, y=4000, label="Feature cutoff : ncol(scRNA)*0.5%", size = 5)

根据上面的阈值进行过滤

scRNA_filt = scRNA[retained_f_low, retained_c_mito & retained_c_ribo & 
                                    retained_c_umi_low & retained_c_umi_high]
dim(scRNA_filt)
#[1]  17276 169516
table(scRNA_filt$group)
# AD    NC 
# 90480 79036

2.3 过滤后的指标可视化

feats <- c("nFeature_RNA", "nCount_RNA")
p_filt_a_1=VlnPlot(scRNA_filt, features = feats, pt.size = 0, ncol = 2, 
        group.by = "group") + NoLegend()
p_filt_a_2=VlnPlot(scRNA_filt, features = feats, pt.size = 0, ncol = 1, 
        group.by = "orig.ident") + NoLegend()
feats <- c("percent_mito","percent_ribo")
p_filt_a_3=VlnPlot(scRNA_filt, features = feats, pt.size = 0, ncol = 2, 
        group.by = "group") + NoLegend()
p_filt_a_4=VlnPlot(scRNA_filt, features = feats, pt.size = 0, ncol = 1, 
        group.by = "orig.ident") + NoLegend()
(p_filt_a_1 / p_filt_a_2) | (p_filt_a_3 / p_filt_a_4)

2.4 保存过滤后的数据（optional）

# saveRDS(scRNA_filt, file = "sce_filt.rds")
# scRNA = readRDS("sce_filt.rds")

3、标准化、高变基因、归一化、降维(PCA、UMAP)、分群

3.1 标准化、高变基因、归一化

由于数据量比较大，ScaleData()函数比较耗时，尤其是加上回归参数。可使用future包的plan()函数设置线程数，详见https://satijalab.org/seurat/archive/v3.0/future_vignette.html
推荐使用四个线程，此外如果在FindIntegrationAnchors()步骤使用报错，设置options(future.globals.maxSize = 1000 * 1024^2) #1G plan()即可

library(future)
plan()
plan("multiprocess", workers = 4)
plan()

scRNA <- NormalizeData(scRNA, normalization.method = "LogNormalize", scale.factor = 10000)
scRNA <- FindVariableFeatures(scRNA, selection.method = "vst", nfeatures = 2000) 
scRNA <- ScaleData(scRNA, features = VariableFeatures(scRNA), 
                   vars.to.regress = c("nFeature_RNA","percent_mito"))

3.2 降维、分群

scRNA <- RunPCA(scRNA, features = VariableFeatures(scRNA)) 
scRNA = RunUMAP(scRNA, dims = pc.num)
#根据group，展示dimplot
p_dim = DimPlot(scRNA, group.by = "group")
scRNA <- FindNeighbors(scRNA, dims = pc.num) 
scRNA <- FindClusters(scRNA, resolution = c(0.01,0.05,0.1,0.2,0.5))
#不同分辨率的分群效果
p_cutree = clustree(scRNA@meta.data, prefix = "RNA_snn_res.")
Idents(scRNA) = scRNA$RNA_snn_res.0.05
table(scRNA@active.ident)
p_dim_1 = DimPlot(scRNA, label = T)

p_dim | p_cutree | p_dim_1

4、细胞类型注释

4.1 逐一展示每个细胞类型的marker gene dotplot，注释cluster

# astrocytes: AQP4, ADGRV1, GPC5, RYR3
# endothelial cells: CLDN5, ABCB1, EBF1
# excitatory neurons: CAMK2A, CBLN2, LDB2
# inhibitory neurons: GAD1, LHFPL3, PCDH15
# microglia: C3, LRMDA, DOCK8
# oligodendrocytes: MBP, PLP1, ST18
astrocytes = c("AQP4", "ADGRV1", "GPC5", "RYR3")
DotPlot(scRNA, features = astrocytes, 
        assay = "RNA") # 2

endothelial = c("CLDN5", "ABCB1", "EBF1")
DotPlot(scRNA, features = endothelial, 
        assay = "RNA") # 10

excitatory = c("CAMK2A", "CBLN2", "LDB2")
DotPlot(scRNA, features = excitatory, 
        assay = "RNA") # 0,6,8,9,11

inhibitory = c("GAD1", "LHFPL3", "PCDH15")
DotPlot(scRNA, features = inhibitory, 
        assay = "RNA") # 3,4,5

microglia = c("C3", "LRMDA", "DOCK8")
DotPlot(scRNA, features = microglia, 
        assay = "RNA") # 7

oligodendrocytes = c("MBP", "PLP1", "ST18")
DotPlot(scRNA, features = oligodendrocytes, 
        assay = "RNA") # 1

合并dotplot

gene_list = list(
  Astro = astrocytes,
  Endo = endothelial,
  Excit = excitatory,
  Inhib = inhibitory,
  Mic = microglia,
  Oligo = oligodendrocytes
)
# 调整cluster的level因子顺序，从而较美观的展示dotplot
scRNA@active.ident = factor(scRNA@active.ident, levels = c(2,10,0,6,8,9,11,3,4,5,7,1))
p_dot_1 = DotPlot(scRNA, features = gene_list, 
        assay = "RNA") +
  theme(axis.text.x = element_text(angle = 60, vjust = 0.8, hjust=0.8))

p_dim_1 | p_dot_1

4.2 根据上述结果，注释细胞类型

scRNA$celltype = ifelse(scRNA@active.ident %in% c(2), "Astro",
                        ifelse(scRNA@active.ident %in% c(10), "Endo",
                               ifelse(scRNA@active.ident %in% c(7), "Mic",
                                      ifelse(scRNA@active.ident %in% c(1), "Oligo",
                                             ifelse(scRNA@active.ident %in% c(3,4,5), "Inhib",
                                                    "Excit")))))
table(scRNA$celltype)
Idents(scRNA) = scRNA$celltype
scRNA@active.ident = factor(scRNA@active.ident, levels = c("Astro","Endo","Excit","Inhib","Mic","Oligo"))
table(scRNA@active.ident)
p_dot_2= DotPlot(scRNA, features = gene_list, 
             assay = "RNA") +
  theme(axis.text.x = element_text(angle = 60, vjust = 0.8, hjust=0.8))
p_dim_2= DimPlot(scRNA, label = T)
p_dim_2 | p_dot_2

4.3 复现paper1B-D

#Fig 1B
fig_1b=DimPlot(scRNA,  cols = c('#1b9e77','#d95f02','#7570b3','#e7298a','#66a61e','#e6ab02'))

#Fig 1C
ct_stat = table(scRNA$celltype)
ct_stat = data.frame(celltype = names(ct_stat),
                     total = as.numeric(ct_stat),
                     percentage = ct_stat/sum(ct_stat))
library(ggplot2)
fig_1c=ggplot(ct_stat, aes(x=celltype, fill=celltype, y = percentage.Freq)) +
  geom_bar(stat="identity",color = "black")  + NoLegend() +
  scale_fill_manual(values=c('#1b9e77','#d95f02','#7570b3','#e7298a','#66a61e','#e6ab02')) +
  theme_classic() + theme(axis.title.x = element_blank()) +
  ylab("Proportion of total nuclei(%)")

#Fig 1D
#Downsample 5000个细胞做Fingallmarker从而节省时间
table(scRNA@active.ident)
CTs = as.character(unique(scRNA@active.ident))
sampled = unlist(lapply(CTs, function(x){
            #x = CTs[1]
            index = scRNA@active.ident %in% x
            cells = names(scRNA@active.ident[index])
            if(length(cells) > 5000){
              sle_cell = sample(cells, 5000)
            } else {
              sle_cell = cells
            }
          }))

scRNA_sample = scRNA[ ,sampled]
dim(scRNA_sample)
table(scRNA_sample@active.ident)
#diff.wilcox = FindAllMarkers(scRNA_sample)
library(tidyverse)
all.markers = diff.wilcox %>% select(gene, everything()) %>%
  subset(p_val<0.05 & abs(diff.wilcox$avg_log2FC) > 0.5)
top5 = all.markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC)
top5 = CaseMatch(search = as.vector(top5$gene), match = rownames(scRNA_sample)) 
#save(top5, file = "top5.rda")
#load("top5.rda")
fig_1d = DoHeatmap(scRNA_sample, features = top5, angle = 0,
                   group.colors = c('#1b9e77','#d95f02','#7570b3','#e7298a','#66a61e','#e6ab02')) 

#合并
((fig_1b + labs(tag = "B"))+ (fig_1c +  labs(tag = "C"))  ) / (fig_1d +  labs(tag = "D"))

5、基于细胞类型的AD/NC差异分析（Fig 5A-D复现）

#Fig 2a
fig_2a=DimPlot(scRNA, split.by = "group", cols = c('#1b9e77','#d95f02','#7570b3','#e7298a','#66a61e','#e6ab02'))

#Fig 2b
ct_stat2 = as.data.frame(table(scRNA$celltype, scRNA$group))
sums = rep(c(sum(ct_stat2$Freq[1:6]),sum(ct_stat2$Freq[7:12])),each=6)
ct_stat2$percentage = ct_stat2$Freq/sums

ct_stat2$Var2 = factor(ct_stat2$Var2, levels = c("NC","AD"))
library(ggplot2)
fig_2b=ggplot(ct_stat2, aes(x=Var1, fill=Var2, y = percentage)) +
  geom_bar( position=position_dodge(width=0.8), width=0.6,
            stat="identity",color = "black")  + 
  scale_fill_manual(values=c('white','black')) +
  theme_classic() + ylab("Proportion of total nuclei(%)") + 
  theme(axis.title.x = element_blank(), legend.title = element_blank())

每种细胞类型的差异分析

library(future)
plan()
plan("multiprocess", workers = 4)
plan()

head(scRNA@meta.data)
head(scRNA@meta.data[,c("group","celltype")])
scRNA$compare = paste(scRNA$group, scRNA$celltype, sep = "_")
table(scRNA$compare)
# AD_Astro  AD_Endo AD_Excit AD_Inhib   AD_Mic AD_Oligo 
# 10101     1621    32277    19271     4209    23001 
# NC_Astro  NC_Endo NC_Excit NC_Inhib   NC_Mic NC_Oligo 
# 8782      598    30050    18021     3714    17871 

ct = levels(scRNA@active.ident)
all_markers = lapply(ct, function(x){
              # x = ct[1]
              print(x)
              markers <- FindMarkers(scRNA, group.by = "compare",
                                     logfc.threshold = 0.1,
                                     ident.1 = paste("AD",x,sep = "_"),
                                     ident.2 = paste("NC",x,sep = "_"))
              #markers_sig <- subset(markers, p_val_adj < 0.1)
              return(markers)
            })
length(all_markers)
names(all_markers) = ct
lapply(all_markers,nrow)

根据文章提供的阈值，确认差异基因：
compared the individual cell-type between AD and NC samples.
adjusted P < 0.1, log 2 fold change ≥ 0.1 or ≤ −0.1

all_markers_sig = lapply(all_markers, function(x){
  markers_sig <- subset(x, p_val_adj < 0.1)
  #markers_sig <- subset(x, p_val < 0.0001)
})
marker_stat = as.data.frame(lapply(all_markers_sig,function(x){
  # x=all_markers_sig[[1]]
  Up = sum(x$avg_log2FC>0)
  Down = sum(x$avg_log2FC<0)
  Total = length(x$avg_log2FC)
  return(c(Up, Down, Total))
}))
rownames(marker_stat) = c("Up","Down","Total")
marker_stat
library(gridExtra)
fig_2c = ggpubr::as_ggplot(tableGrob(marker_stat, theme = ttheme_default(base_size = 25)))

Fig 2D Veen图
难点：venn.diagram转为ggplot对象，以便于拼图

names(all_markers_sig)
Astro = rownames(all_markers_sig$Astro)
Endo = rownames(all_markers_sig$Endo)
`Excit+Inhib` = unique(c(rownames(all_markers_sig$Excit),
                  rownames(all_markers_sig$Inhib)))
Mic = rownames(all_markers_sig$Mic)
Oligo = rownames(all_markers_sig$Oligo)
veen_eles2 = list(Astro=Astro, Endo=Endo, 
                  `Excit+Inhib`=`Excit+Inhib`,
                  Mic=Mic, Oligo=Oligo)
library(VennDiagram)
library(ggplotify)
library(gridGraphics)
plt=venn.diagram(veen_eles2, filename=NULL)
p_tmp <- grobTree(plt)
fig_2d=as.ggplot(plot_grid(p_tmp))

合并

(fig_2a + fig_2b)/(fig_2c + fig_2d) + plot_annotation(tag_levels = 'A')

6、以Astro为例的细胞再聚类深入分析（Fig 4A-D）

Astro细胞类型的clsuter再聚类→分为三大类：
(1) Astro细胞的AD上调基因相对高表达的clusters
(2) Astro细胞的AD下调基因相对高表达的clusters
(3) 其它clusters
对前两大类cluster进行差异分析，以及富集分析（有点绕，可参看原文）
首先对Astro细胞类型的clsuter，重新分析

library(future)
plan()
plan("multiprocess", workers = 4)
plan()
#subcluster analysis
scRNA_astro = subset(scRNA, celltype == "Astro")
dim(scRNA_astro)
scRNA_astro <- NormalizeData(scRNA_astro, normalization.method = "LogNormalize", scale.factor = 10000)
scRNA_astro <- FindVariableFeatures(scRNA_astro, selection.method = "vst", nfeatures = 2000) 
scRNA_astro <- ScaleData(scRNA_astro, features = VariableFeatures(scRNA_astro), 
                   vars.to.regress = c("nFeature_RNA","percent_mito"))
scRNA_astro <- RunPCA(scRNA_astro, features = VariableFeatures(scRNA_astro)) 
ElbowPlot(scRNA_astro, ndims=30, reduction="pca") 
pc.num=1:30
scRNA_astro = RunUMAP(scRNA_astro, dims = pc.num)
DimPlot(scRNA_astro, group.by = "group")
DimPlot(scRNA_astro, split.by = "group")

scRNA_astro <- FindNeighbors(scRNA_astro, dims = pc.num)
scRNA_astro <- FindClusters(scRNA_astro, resolution = c(0.01,0.05,0.1,0.2,0.3,0.4,0.5))
clustree(scRNA_astro@meta.data, prefix = "RNA_snn_res.")
#选择resolution 0.3的分群结果
scRNA_astro$sle_reso = as.numeric(scRNA_astro$RNA_snn_res.0.3)
scRNA_astro$sle_reso = paste0("a",scRNA_astro$sle_reso)
table(scRNA_astro$sle_reso)
scRNA_astro$sle_reso = factor(scRNA_astro$sle_reso, levels = paste0("a",1:12))
Idents(scRNA_astro) = scRNA_astro$sle_reso
table(scRNA_astro@active.ident)
scRNA_astro$group = factor(scRNA_astro$group, levels = c("NC","AD"))
fig_4a=DimPlot(scRNA_astro, split.by = "group")
p_sub_dim = DimPlot(scRNA_astro, label = T)

根据上述结果，以及对应paper，分为3大类细胞

scRNA_astro$subpop = ifelse(scRNA_astro@active.ident %in% c("a1","a5"), "Up",
                            ifelse(scRNA_astro@active.ident %in% c("a4","a3","a11"), 
                                   "Down","No change"))
table(scRNA_astro$subpop)
scRNA_astro$subpop = factor(scRNA_astro$subpop, levels = c("Up","Down","No change"))
fig_4b=DimPlot(scRNA_astro, group.by = "subpop", cols = c("#e41a1c","#4daf4a","#d9d9d9"))

subcluster DEG

marker_Astro <- FindMarkers(scRNA_astro, group.by = "subpop",
                       ident.1 = "Down",
                       ident.2 = "Up")
marker_Astro = marker_Astro[order(marker_Astro$avg_log2FC, decreasing = T),]
head(marker_Astro)
scRNA_astro_sub = subset(scRNA_astro, subpop != "No change")
scRNA_astro_sub$subpop = factor(scRNA_astro_sub$subpop, levels = c("Down","Up"))
library(future)
plan()
plan("multiprocess", workers = 4)
plan()
scRNA_astro_sub <- ScaleData(scRNA_astro_sub, features = rownames(marker_Astro), 
                   vars.to.regress = c("nFeature_RNA","percent_mito"))

#Doheatmap结果中，只展示部分感兴趣(与paper重合)的基因注释
paper = c("GLUD1","GLUL","PTN","TNIK","SLC4A4","GABRA2","VEGFA","ETNPPL","SLC1A3",
          "TUBB2B","SPARCL1","HES1","WIF1","HES5","APOE","SLC1A2","F3")
table(paper %in% rownames(marker_Astro)[1:100])
genes.to.label = paper[paper %in% rownames(marker_Astro)[1:100]]
labels <- rep(x = "transparent", times = length(x = rownames(marker_Astro)[1:100]))
labels[match(x = genes.to.label, table = rownames(marker_Astro)[1:100])] <- "black"
fig_4c=DoHeatmap(object = scRNA_astro_sub, features = rownames(marker_Astro)[1:100], 
          group.by = "subpop", group.colors = c("#4daf4a","#e41a1c"), angle = 0) +
  theme(axis.text.y = element_text(color = rev(x = labels)))
fig_4c

差异基因的富集分析结果

library(clusterProfiler)
library(org.Hs.eg.db)
ego <- enrichGO(gene          = rownames(marker_Astro)[1:200],
                keyType       = "SYMBOL",
                OrgDb         = org.Hs.eg.db,
                ont           = "BP",
                pAdjustMethod = "BH",
                pvalueCutoff  = 1,
                qvalueCutoff  = 1)
res = ego@result
interest_go = which(rownames(ego@result) %in% c("GO:0021953","GO:0050769","GO:0021872","GO:0021879",
                "GO:0007409","GO:0042552","GO:0008366","GO:0030900")
interest_go=ego@result$Description[interest_go]
fig_4d=barplot(ego, showCategory=interest_go, cols="pvalue")

合并

(fig_4a + fig_4b)/(fig_4c + fig_4d) + plot_layout(widths = c(2,1)) + 
  plot_annotation(tag_levels = 'A')

复现2：AD与Normal细胞类型水平的差异基因挖掘(snRNA

1、读入数据

1.1 下载数据(GSE157827)--上传到服务器--批量改名(`Read10X()`)--创建Seurat对象

1.2 metadata细节整理

2、质控、过滤低质量的细胞、基因

2.1 过滤前的指标可视化

2.2 确定过滤细胞、基因的阈值

2.3 过滤后的指标可视化

2.4 保存过滤后的数据（optional）

3、标准化、高变基因、归一化、降维(PCA、UMAP)、分群

3.1 标准化、高变基因、归一化

3.2 降维、分群

4、细胞类型注释

4.1 逐一展示每个细胞类型的marker gene dotplot，注释cluster

4.2 根据上述结果，注释细胞类型

4.3 复现paper1B-D

5、基于细胞类型的AD/NC差异分析（Fig 5A-D复现）

6、以Astro为例的细胞再聚类深入分析（Fig 4A-D）

猜你喜欢

热点阅读

复现2：AD与Normal细胞类型水平的差异基因挖掘(snRNA

1、读入数据

1.1 下载数据(GSE157827)--上传到服务器--批量改名(Read10X())--创建Seurat对象

1.2 metadata细节整理

2、质控、过滤低质量的细胞、基因

2.1 过滤前的指标可视化

2.2 确定过滤细胞、基因的阈值

2.3 过滤后的指标可视化

2.4 保存过滤后的数据（optional）

3、标准化、高变基因、归一化、降维(PCA、UMAP)、分群

3.1 标准化、高变基因、归一化

3.2 降维、分群

4、细胞类型注释

4.1 逐一展示每个细胞类型的marker gene dotplot，注释cluster

4.2 根据上述结果，注释细胞类型

4.3 复现paper1B-D

5、基于细胞类型的AD/NC差异分析（Fig 5A-D复现）

6、以Astro为例的细胞再聚类深入分析（Fig 4A-D）

猜你喜欢

热点阅读

1.1 下载数据(GSE157827)--上传到服务器--批量改名(`Read10X()`)--创建Seurat对象