scRNAseq

ggplot2绘制单细胞经典火山图

2022-10-31  本文已影响0人  Hayley笔记

单细胞绘图系列:


1. 计算差异基因

用pbmc数据集做演示

library(Seurat)
library(patchwork)
library(clusterProfiler)
library(org.Mm.eg.db) ##加载小鼠
library(org.Hs.eg.db) ##加载人类
library(tidyverse)
pbmc <-readRDS("pbmc.rds")
table(pbmc$cell_type)
deg_all <- FindMarkers(pbmc, ident.1 = 'Memory CD4 T',ident.2 = 'Naive CD4 T', 
                              group.by = 'cell_type',logfc.threshold = 0,min.pct = 0,pseudocount.use = 0.01)
#sig_dge.all <- subset(deg_all, p_val_adj<0.05&abs(avg_log2FC)>0.15) #所有差异基因
#View(dge_all)
2. 定义绘图函数
VolcanoPlot=function(dif, log2FC=log2(1.5), padj=0.05, 
                 label.symbols=NULL, label.max=30,
                 cols=c("#497aa2", "#ae3137"), title=""){
  if( all( !c("log2FoldChange", "padj", "symbol") %in% colnames(dif) )){
    stop("Colnames must include: log2FoldChange, padj, symbol")
  }
  rownames(dif)=dif$symbol
  
  # (1) define up and down
  dif$threshold="ns";
  dif[which(dif$log2FoldChange > log2FC & dif$padj <padj),]$threshold="up";
  dif[which(dif$log2FoldChange < (-log2FC) & dif$padj < padj),]$threshold="down";
  dif$threshold=factor(dif$threshold, levels=c('down','ns','up'))
  #head(dif)
  #
  tb2=table(dif$threshold); print(tb2)
  library(ggplot2)
  # (2) plot
  g1 = ggplot(data=dif, aes(x=log2FoldChange, y=-log10(padj), color=threshold)) +
    geom_point(alpha=0.8, size=0.8) +
    geom_vline(xintercept = c(-log2FC, log2FC), linetype=2, color="grey")+
    geom_hline(yintercept = -log10(padj), linetype=2, color="grey")+
    labs(title= ifelse(""==title, "", paste("DEG:", title)))+
    xlab(bquote(Log[2]*FoldChange))+
    ylab(bquote(-Log[10]*italic(P.adj)) )+
    theme_classic(base_size = 14) +
    theme(legend.box = "horizontal",
          legend.position="top",
          legend.spacing.x = unit(0, 'pt'),
          legend.text = element_text( margin = margin(r = 20) ),
          legend.margin=margin(b= -10, unit = "pt"),
          plot.title = element_text(hjust = 0.5, size=10)
          ) +
    scale_color_manual('',labels=c(paste0("down(",tb2[[1]],')'),'ns',
                                   paste0("up(",tb2[[3]],')' )),
                       values=c(cols[1], "grey", cols[2]) )+
    guides(color=guide_legend(override.aes = list(size=3, alpha=1))); g1;
  # (3)label genes
  if(is.null(label.symbols)){
    dif.sig=dif[which(dif$threshold != "ns" ), ]
    len=nrow(dif.sig)
    if(len<label.max){
      label.symbols=rownames(dif.sig)
    }else{
      dif.sig=dif.sig[order(dif.sig$log2FoldChange), ]
      dif.sig= rbind(dif.sig[1:(label.max/2),], dif.sig[(len-label.max/2):len,])
      label.symbols=rownames(dif.sig)
    }
  }
  dd_text = dif[label.symbols, ]
  print((dd_text))
  # add text
  library(ggrepel)
  g1 + geom_text_repel(data=dd_text,
                       aes(x=log2FoldChange, y=-log10(padj), label=row.names(dd_text)),
                         #size=2.5, 
                         colour="black",alpha=1)
}
3. 构建中间数据 dif
dif=data.frame(
  symbol=rownames(deg_all),
  log2FoldChange=deg_all$avg_log2FC,
  padj=deg_all$p_val_adj
)
4. 画图
# 可以指定要标记的DEG数量,选出FC最大和最小的基因标记
p1 = VolcanoPlot(dif, padj=0.05, title="DSS vs WT", label.max = 50)
# 自定义颜色
p2 = VolcanoPlot(dif, padj=0.05, title="DSS vs WT", label.max = 50, cols=c("blue", "red"))


# 也可以指定要标记的基因名字
p3 = VolcanoPlot(dif, padj=1e-10, title="DSS vs WT -2", 
            label.symbols=dif[ ((abs(dif$log2FoldChange) > 2) & (dif$padj < 1e-50) ) | 
                                      abs(dif$log2FoldChange) > 4,]$symbol )
p1|p2|p3

来源:ggplot2 | 单细胞类间比较的火山图 - 经典效果

上一篇 下一篇

猜你喜欢

热点阅读